<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Controlling Stemming | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="stemming.html" title="Reducing Words to Their Root Form">
<link rel="prev" href="choosing-a-stemmer.html" title="Choosing a Stemmer">
<link rel="next" href="stemming-in-situ.html" title="Stemming in situ">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-link"><a href="stemming.html">Reducing Words to Their Root Form</a></span>
»
<span class="breadcrumb-node">Controlling Stemming</span>
</div>
<div class="navheader">
<span class="prev">
<a href="choosing-a-stemmer.html">« Choosing a Stemmer</a>
</span>
<span class="next">
<a href="stemming-in-situ.html">Stemming in situ »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="controlling-stemming"></a>Controlling Stemming<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/50_Controlling_stemming.asciidoc">edit</a>
</h2>
</div></div></div>
<p>Out-of-the-box stemming solutions are never perfect.  Algorithmic stemmers,
especially, will blithely apply their rules to any words they encounter,
perhaps conflating words that you would prefer to keep separate.  Maybe, for
your use case, it is important to keep <code class="literal">skies</code> and <code class="literal">skiing</code> as distinct words
rather than stemming them both down to <code class="literal">ski</code> (as would happen with the
<code class="literal">english</code> analyzer).</p>
<p>The <a href="/guide/en/elasticsearch/reference/2.4/analysis-keyword-marker-tokenfilter.html" class="ulink" target="_top"><code class="literal">keyword_marker</code></a> and
<a href="/guide/en/elasticsearch/reference/2.4/analysis-stemmer-override-tokenfilter.html" class="ulink" target="_top"><code class="literal">stemmer_override</code></a> token filters
allow us to customize the stemming process.</p>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="preventing-stemming"></a>Preventing Stemming<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/50_Controlling_stemming.asciidoc">edit</a>
</h3>
</div></div></div>
<p>The <a class="xref" href="configuring-language-analyzers.html#stem-exclusion"><code class="literal">stem_exclusion</code></a> parameter for language analyzers (see
<a class="xref" href="configuring-language-analyzers.html" title="Configuring Language Analyzers">Configuring Language Analyzers</a>) allowed us to specify a list of words that
should not be stemmed.  Internally, these language analyzers use the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-keyword-marker-tokenfilter.html" class="ulink" target="_top"><code class="literal">keyword_marker</code> token filter</a>
to mark the listed words as <em>keywords</em>, which prevents subsequent stemming
token filters from touching those words.</p>
<p>For instance, we can create a simple custom analyzer that uses the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-porterstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">porter_stem</code></a> token filter,
but prevents the word <code class="literal">skies</code> from being stemmed:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">PUT /my_index
{
  "settings": {
    "analysis": {
      "filter": {
        "no_stem": {
          "type": "keyword_marker",
          "keywords": [ "skies" ] <a id="CO161-1"></a><i class="conum" data-value="1"></i>
        }
      },
      "analyzer": {
        "my_english": {
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "no_stem",
            "porter_stem"
          ]
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO161-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>They <code class="literal">keywords</code> parameter could accept multiple words.</p>
</td>
</tr>
</table>
</div>
<p>Testing it with the <code class="literal">analyze</code> API shows that just the word <code class="literal">skies</code> has
been excluded from stemming:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">GET /my_index/_analyze?analyzer=my_english
sky skies skiing skis <a id="CO162-1"></a><i class="conum" data-value="1"></i></pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO162-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>Returns: <code class="literal">sky</code>, <code class="literal">skies</code>, <code class="literal">ski</code>, <code class="literal">ski</code></p>
</td>
</tr>
</table>
</div>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<a id="keyword-path"></a>
<p>While the language analyzers allow us only to specify an array of words in the
<code class="literal">stem_exclusion</code> parameter, the <code class="literal">keyword_marker</code> token filter also accepts a
<code class="literal">keywords_path</code> parameter that allows us to store all of our keywords in a
file. The file should contain one word per line, and must be present on every
node in the cluster. See <a class="xref" href="using-stopwords.html#updating-stopwords" title="Updating Stopwords">Updating Stopwords</a> for tips on how to update this
file.</p>
</div>
</div>
</div>

<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="customizing-stemming"></a>Customizing Stemming<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/50_Controlling_stemming.asciidoc">edit</a>
</h3>
</div></div></div>
<p>In the preceding example, we prevented <code class="literal">skies</code> from being stemmed, but perhaps we
would prefer it to be stemmed to <code class="literal">sky</code> instead.  The
<a href="/guide/en/elasticsearch/reference/2.4/analysis-stemmer-override-tokenfilter.html" class="ulink" target="_top"><code class="literal">stemmer_override</code></a> token
filter allows us to specify our own custom stemming rules. At the same time,
we can handle some irregular forms like stemming <code class="literal">mice</code> to <code class="literal">mouse</code> and <code class="literal">feet</code>
to <code class="literal">foot</code>:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">PUT /my_index
{
  "settings": {
    "analysis": {
      "filter": {
        "custom_stem": {
          "type": "stemmer_override",
          "rules": [ <a id="CO163-1"></a><i class="conum" data-value="1"></i>
            "skies=&gt;sky",
            "mice=&gt;mouse",
            "feet=&gt;foot"
          ]
        }
      },
      "analyzer": {
        "my_english": {
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "custom_stem", <a id="CO163-2"></a><i class="conum" data-value="2"></i>
            "porter_stem"
          ]
        }
      }
    }
  }
}

GET /my_index/_analyze?analyzer=my_english
The mice came down from the skies and ran over my feet <a id="CO163-3"></a><i class="conum" data-value="3"></i></pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO163-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>Rules take the form <code class="literal">original=&gt;stem</code>.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO163-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">stemmer_override</code> filter must be placed before the stemmer.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO163-3"><i class="conum" data-value="3"></i></a></p>
</td>
<td align="left" valign="top">
<p>Returns <code class="literal">the</code>, <code class="literal">mouse</code>, <code class="literal">came</code>, <code class="literal">down</code>, <code class="literal">from</code>, <code class="literal">the</code>, <code class="literal">sky</code>,
<code class="literal">and</code>, <code class="literal">ran</code>, <code class="literal">over</code>, <code class="literal">my</code>, <code class="literal">foot</code>.</p>
</td>
</tr>
</table>
</div>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>Just as for the <code class="literal">keyword_marker</code> token filter, rules can be stored
in a file whose location should be specified with the <code class="literal">rules_path</code>
parameter.</p>
</div>
</div>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="choosing-a-stemmer.html">« Choosing a Stemmer</a>
</span>
<span class="next">
<a href="stemming-in-situ.html">Stemming in situ »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
